use /Users/sierravaldez/Downloads/CCES_Panel_Full3waves_VV_V4.dta, replace

*reshape wide to long
gen id = _n

**rename variables that start with CC10
rename CC10* *_10
rename CC12* *_12
rename CC14* *_14

**correcting for different variable sets so I can analyze these
generate _423aa_10 = 0 
generate _423bb_10 = 0
generate hispanic_post_10 = -1

**rename stubs that aren't working to end in letters
// define mapping for the middle number → letter
local oldnums 1 2 3 4
local newletters a b c d

// list all stubs that have middle numbers
local stubs _322 

foreach stub of local stubs {
    forvalues i=1/4 {
        local old : word `i' of `oldnums'
        local new : word `i' of `newletters'
        
        // loop over all year/wave suffixes (adjust if more than 10/11/12)
        foreach yr in 10 12 14 {
            capture rename `stub'_`old'_`yr' `stub'_`new'_`yr'
        }
    }
}

**rename stubs that aren't working to end in letters
// define mapping for the middle number → letter
local oldlets a b c d
local newletters a b c d

// list all stubs that have middle numbers
local stubs _322 

foreach stub of local stubs {
    forvalues i=1/4 {
        local old : word `i' of `oldlets'
        local new : word `i' of `newletters'
        
        // loop over all year/wave suffixes (adjust if more than 10/11/12)
        foreach yr in 10 12 14 {
            capture rename `stub'_`old'_`yr' `stub'`new'_`yr'
        }
    }
}

**reshape
reshape long gender_ race_ educ_ newsint_ marstat_ pid7_ ideo5_ relig_pew_ faminc_ votereg_ cbsa3_ regzip_ regzip_post_ countyfips_ hispanic_ hispanic_post_ employ_ immstat_ _322a_ _322b_ _322c_ _322d_ _324_ _327_ _422a_ _422b_ _423aa_ _423bb_ religpew_ birthyr_, i(id) j(wave)

**rename waves
gen wave_index = .
replace wave_index = 1 if wave==10
replace wave_index = 2 if wave==12
replace wave_index = 3 if wave==14

**panel
tsset id wave_index

sort id wave_index

**Create lag for hispanic in last wave and two waves ago
by id: generate race_lag1 = L.race_  // previous wave
by id: generate race_lag2 = L2.race_  // two waves ago

**Sees if they have changed race to or from hispanic
generate reverse_attritor = .
replace reverse_attritor = 0 if _n > 1
by id: replace reverse_attritor = 1 if (race_ == 3 & race_lag1 != 3) & _n > 1 & !missing(race_) & !missing(race_lag1) & race_lag1 != 6

generate attritor = .
replace attritor = 0 if _n > 1
by id: replace attritor = 1 if (race_ != 3 & race_lag1 == 3) & _n > 1 & !missing(race_) & !missing(race_lag1) & race_ != 6

generate attritor_broad = .
replace attritor_broad = 0 if _n > 1
**attritors who racially identify as hispanic and then stop
by id: replace attritor_broad = 1 if (race_ != 3 & race_lag1 == 3) & _n > 1 & !missing(race_) & !missing(race_lag1) & race_ != 6
**attritors who claim hispanic heritage (but not hispanic racial id) and then stop
by id: replace attritor_broad = 1 if (hispanic_ != 1 & L.hispanic_ == 1) & _n > 1 & !missing(race_) & !missing(race_lag1) & race_ != 3 

**people who say they are racially something else but ethnically hispanic
**RECODE
generate race_heritage_attritor = .
replace race_heritage_attritor = 0 if _n > 1
by id: replace race_heritage_attritor = 1 if (race_ != 3 & race_lag1 == 3) & (hispanic_ == 1) & !missing(race_) & !missing(race_lag1) & race_ != 6

**changed definition on 1/5/26!!
generate white_attritor = .
replace white_attritor = 0 if _n > 1 
by id: replace white_attritor = 1 if attritor == 1 & race_ == 1
* Back fill: mark all PRIOR observations for anyone who becomes an attritor
/*sort id wave
by id: egen ever_white_attritor = max(white_attritor)
replace white_attritor = ever_white_attritor
drop ever_white_attritor*/

**changed definition on 1/5/26
generate black_attritor = .
by id: replace black_attritor = 0 if _n != 0
by id: replace black_attritor = 1 if attritor == 1 & race_ == 2
* Back fill: mark all PRIOR observations for anyone who becomes an attritor
/*sort id wave
by id: egen ever_black_attritor = max(black_attritor)
replace black_attritor = ever_black_attritor
drop ever_black_attritor*/

generate white_attritor_broad = .
replace white_attritor_broad = 0 if _n > 1 
replace white_attritor_broad = 1 if attritor_broad == 1 & race_ == 1
* Back fill: mark all PRIOR observations for anyone who becomes an attritor
/*sort id wave
by id: egen ever_broad_attritor = max(attritor_broad)
replace attritor_broad = ever_broad_attritor
drop ever_broad_attritor*/

generate black_attritor_broad = .
replace black_attritor_broad = 0 if _n > 1 
replace black_attritor_broad = 1 if attritor_broad == 1 & race_ == 2


**hispanic variable
generate hispanic_all = 0
replace hispanic_all = 1 if race_ == 3 
replace hispanic_all = 1 if attritor == 1
replace hispanic_all = 1 if hispanic_ == 1

**catholicism variable
generate catholic = 0
replace catholic = 1 if religpew_ == 2

**recode for missingness for resentment scales first
recode _422a_ (8=".") (9=".")
recode _422a_ (8=".") (9=".")
recode _423aa_ (8=".") (9=".")
recode _423bb_ (8=".") (9=".")

**rename 422a and 422b is the resentment variable scale > 422a is should work way up
recode _422a_ (1=5) (2=4) (3=3) (4=2) (5=1), gen(rev_422a_)
generate racial_resentment = rev_422a_ + _422b_

**create 422aa and 422bb for hispanic resentment > 422aa is should work way up
recode _423aa_ (1=5) (2=4) (3=3) (4=2) (5=1), gen(rev_423aa_)
generate hispanic_resentment = rev_423aa_ + _423bb_

******
******recode for missingness and rescale 0 to 1
******

**rescale resentment 0-1
egen resent_scale_min = min(racial_resentment)
egen resent_scale_max = max(racial_resentment)
gen racial_resentment_rescaled = (racial_resentment - resent_scale_min) / (resent_scale_max - resent_scale_min)

**rescale resentment 0-1
egen hispresent_scale_min = min(hispanic_resentment)
egen hispresent_scale_max = max(hispanic_resentment)
gen hispanic_resentment_rescaled = (hispanic_resentment - hispresent_scale_min) / (hispresent_scale_max - hispresent_scale_min)


****gender should be a categorical variable from now on!11 i.gender
****should be using faminc not employ

**recode cit1 0-1
**recoded immstat on 1/23 so non-citizen was 0 and imm citizen was 0.25
recode immstat_ (1=2) (2=1)
egen immstat_min = min(immstat_)
egen immstat_max = max(immstat_)
gen immstat_rescaled = (immstat_ - immstat_min) / (immstat_max - immstat_min)

**recode faminc
egen faminc_min = min(faminc_)
egen faminc_max = max(faminc_)
gen faminc_rescaled = (faminc_ - faminc_min) / (faminc_max - faminc_min)

**recode educ
egen educ_min = min(educ_)
egen educ_max = max(educ_)
gen educ_rescaled = (educ_ - educ_min) / (educ_max - educ_min)

**recode age
egen age_min = min(birthyr_)
egen age_max = max(birthyr_)
gen age_rescaled = (birthyr_ - age_min) / (age_max - age_min)

**rescale birthyear
egen birthyr_min = min(birthyr_)
egen birthyr_max = max(birthyr_)
gen birthyr_rescaled = (birthyr_ - birthyr_min) / (birthyr_max - birthyr_min)

**recode pid7
egen pid7_min = min(pid7_) if pid7_ != 8 & pid7_ != 98 & pid7_ != 99
egen pid7_max = max(pid7_) if pid7_ != 8 & pid7_ != 98 & pid7_ != 99
gen pid7_rescaled = (pid7_ - pid7_min) / (pid7_max - pid7_min) if pid7_ != 8 & pid7_ != 98 & pid7_ != 99

**recode marstat
egen marstat_min = min(marstat_)
egen marstat_max = max(marstat_)
gen marstat_rescaled = (marstat_ - marstat_min) / (marstat_max - marstat_min)

**recode employ
egen employ_min = min(employ_)
egen employ_max = max(employ_)
gen employ_rescaled = (employ_ - employ_min) / (employ_max - employ_min)

**recode ideology
*put the last category into the first to code it out as missing later
recode ideo5_ (6=1)
egen ideo5_min = min(ideo5_)
egen ideo5_max = max(ideo5_)
gen ideo5_rescaled = (ideo5_ - ideo5_min) / (ideo5_max - ideo5_min)

**create a variable for changing resentment
**want one for a change and one for seeing if they attrit after they have high resentment
**control for changing attitudes generally 

**tsset id wave

tsset id wave_index

**create a resentment lag variable
gen rr_lag = L.racial_resentment_rescaled

gen change_rr = racial_resentment_rescaled - L.racial_resentment_rescaled
 
**do positive change
gen pos_change_rr = 0
replace pos_change_rr = change_rr if change_rr > 0 

**high resentment variable
gen high_resentment = 0
replace high_resentment = 1 if racial_resentment_rescaled > 0.5


*************************************
**hispanic resentment too
**create a resentment lag variable
gen hr_lag = L.hispanic_resentment_rescaled

gen change_hr = hispanic_resentment_rescaled - L.hispanic_resentment_rescaled

**high resentment variable
gen high_resentment_hr = 0
replace high_resentment_hr = 1 if hispanic_resentment_rescaled > 0.5

****make inconsistency variable
***rescale race
egen race_min = min(race_)
egen race_max = max(race_)
gen race_rescaled = (race_ - race_min) / (race_max - race_min)

**code changes in variables
gen change_immstat = immstat_rescaled - L.immstat_rescaled
gen change_faminc = faminc_rescaled - L.faminc_rescaled
gen change_pid7 = pid7_rescaled - L.pid7_rescaled
gen change_gender = gender - L.gender
gen change_catholic = catholic - L.catholic
gen change_race = race_rescaled - L.race_rescaled
gen change_resentment = racial_resentment_rescaled - L.racial_resentment_rescaled

**code actual change variable
gen inconsistent_response = (change_immstat!=0) + (change_faminc!=0) + ///
(change_pid7!=0) + (change_gender!=0) ///
+ (change_catholic!=0) + (change_race!=0) + (change_resentment!=0)
**recode because 9 is just everyone in wave 1 who has no lag
recode inconsistent_response (9=0)
***is inconsistency a bad control? im worried bc it might just eliminate the effect because inconsistency is what im curious about not something i need to control for but i thought that one paper was interesting

**code for missingness
**marstat missing, pid7 missing, faminc missing, immstat missing
gen marstat_missing = 0
replace marstat_missing = 1 if marstat_rescaled == .

gen pid7_missing = 0
replace pid7_missing = 1 if pid7_rescaled == .
replace pid7_missing = 1 if pid7_ == 8
replace pid7_missing = 1 if pid7_ == 98
replace pid7_missing = 1 if pid7_ == 99

gen faminc_missing = 0
replace faminc_missing = 1 if faminc_rescaled == .

gen immstat_missing = 0
replace immstat_missing = 1 if immstat_rescaled == .

gen ideo5_missing = 0
replace ideo5_missing = 1 if ideo5_ == .
**recoded ideo5 on 1/23 so not sure = 0 instead of 1
replace ideo5_missing = 1 if ideo5_ == 6


replace marstat_rescaled = 0 if marstat_missing == 1
replace pid7_rescaled = 0 if pid7_missing == 1
replace faminc_rescaled = 0 if faminc_missing == 1
replace immstat_rescaled = 0 if immstat_missing == 1

**fix marstat so it's a variable to show if youve been married
gen married_before = 0
replace married_before = 1 if marstat_rescaled < 0.8

ssc install outreg2

*wave index makes it 1 to 3 instead of 10 to 14
tsset id wave_index
svyset [pweight=weight]

**descriptive statistics
graph pie, over(race_) plabel(_all percent, size(medlarge))

svy: tab race_
svy: tab birthyr_
svy: tab gender
svy: tab faminc_
svy: tab immstat_
svy: tab educ_
svy: tab marstat_
svy: tab hispanic_all

**thesis power analysis
**find mean for hispanic
mean racial_resentment_rescaled if hispanic_all == 1 & attritor == 0
estat sd
mean racial_resentment_rescaled if white_attritor == 1
estat sd
svy: mean racial_resentment_rescaled if hispanic_all == 1 & attritor == 0
estat sd
svy: mean racial_resentment_rescaled if white_attritor == 1
estat sd
**use 1737 stats test for minimal detectable effect
power twomeans 0.636, power(0.8) n(35 55 75 95 115 135 155) sd(0.2508367) graph

**power actual N
power twomeans 0.6666602 0.6361243, sd1(0.2615) sd2(0.31851) n1(75) n2(558) graph
**power = 0.15
**power svy N
power twomeans 0.6666602 0.6361243, sd1(0.2615) sd2(0.31851) n1(107) n2(895) graph
**power = 0.19

power twomeans 0.616, sd1(0.2921) sd2(0.327) n1(75) n2(558) power(0.8) 
**delta = 0.1031, m2 = 0.7191
**for svy adjusted
power twomeans 0.6361, sd1(0.2615) sd2(0.31851) n1(107) n2(895) power(0.8)
**delta = 0.0774, m2 = 0.7135

**thesis descriptive stats
svy: tab immstat_ white_attritor if hispanic_all == 1
svy: tab faminc_ white_attritor if hispanic_all == 1
svy: tab faminc_ if white_attritor == 1
svy: tab faminc_ if white_attritor == 0 & hispanic_all == 1
svy: tab marstat_ if white_attritor == 1
svy: tab marstat_ if white_attritor == 0 & hispanic_all == 1
svy: tab educ_ if white_attritor == 1
svy: tab educ_ if white_attritor == 0 & hispanic_all == 1
svy: tab catholic if white_attritor == 1
svy: tab catholic if white_attritor == 0 & hispanic_all == 1
svy: tab racial_resentment_rescaled if white_attritor == 1
svy: tab racial_resentment_rescaled if white_attritor == 0 & hispanic_all == 1
svy: mean racial_resentment_rescaled if white_attritor == 1

**proposal descriptive stats
svy: tab educ_ high_resentment if high_resentment == 1 & hispanic_all == 1
svy: tab educ_ high_resentment if high_resentment == 0 & hispanic_all == 1
svy: tab faminc_ high_resentment if high_resentment == 1 & hispanic_all == 1
svy: tab faminc_ high_resentment if high_resentment == 0 & hispanic_all == 1
svy: tab marstat_ high_resentment if high_resentment == 1 & hispanic_all == 1
svy: tab marstat_ high_resentment if high_resentment == 0 & hispanic_all == 1
svy: tab immstat_ high_resentment if high_resentment == 1 & hispanic_all == 1
svy: tab immstat_ high_resentment if high_resentment == 0 & hispanic_all == 1
svy: tab catholic high_resentment if high_resentment == 1 & hispanic_all == 1
svy: tab catholic high_resentment if high_resentment == 0 & hispanic_all == 1

***thesis Model A
svy: regress white_attritor L.racial_resentment_rescaled if hispanic_all == 1
outreg2 using "regtable2.doc"


***thesis Model(s) B
svy: regress white_attritor rr_lag L.immstat_rescaled L.faminc_rescaled L.married_before L.educ_rescaled L.gender L.birthyr_rescaled L.catholic L.marstat_missing L.pid7_missing L.faminc_missing L.immstat_missing if hispanic_all == 1
outreg2 using "regtable__3.doc"

svy: regress white_attritor L.racial_resentment L.immstat_rescaled L.faminc_rescaled L.married_before L.educ_rescaled L.gender L.birthyr_rescaled L.catholic L.marstat_missing L.pid7_missing L.faminc_missing L.immstat_missing if hispanic_all == 1

***thesis Model(s) C
svy: regress white_attritor rr_lag L.pid7_rescaled L.immstat_rescaled L.faminc_rescaled L.married_before L.educ_rescaled L.gender L.birthyr_rescaled L.catholic L.marstat_missing L.pid7_missing L.faminc_missing L.immstat_missing if hispanic_all == 1
outreg2 using "regtable_56.doc"

svy: regress white_attritor rr_lag L.ideo5_rescaled L.immstat_rescaled L.faminc_rescaled L.married_before L.educ_rescaled L.gender L.birthyr_rescaled L.catholic L.marstat_missing L.pid7_missing L.faminc_missing L.immstat_missing L.ideo5_missing if hispanic_all == 1
outreg2 using "regtable_57.doc"

svy: regress white_attritor rr_lag L.pid7_rescaled L.ideo5_rescaled L.immstat_rescaled L.faminc_rescaled L.married_before L.educ_rescaled L.gender L.birthyr_rescaled L.catholic L.marstat_missing L.pid7_missing L.faminc_missing L.immstat_missing L.ideo5_missing if hispanic_all == 1
outreg2 using "regtable5.doc"

*svy: regress white_attritor L.racial_resentment L.pid7_rescaled L.ideo5_rescaled L.immstat_rescaled L.faminc_rescaled L.married_before L.educ_rescaled L.gender L.birthyr_rescaled L.catholic L.marstat_missing L.pid7_missing L.faminc_missing L.immstat_missing if hispanic_all == 1

***thesis Model D
svy: regress white_attritor hispanic_resentment_rescaled

svy: regress white_attritor L.hispanic_resentment_rescaled if hispanic_all == 1
svy: regress white_attritor hispanic_resentment_rescaled if hispanic_all == 1

outreg2 using "regtable6.doc"

***thesis Model E
svy: regress white_attritor hispanic_resentment_rescaled pid7_rescaled ideo5_rescaled immstat_rescaled faminc_rescaled married_before educ_rescaled gender birthyr_rescaled catholic marstat_missing pid7_missing faminc_missing immstat_missing ideo5_missing if hispanic_all == 1
outreg2 using "regtable3A.doc"

***thesis Model F
svy: regress white_attritor L.hispanic_resentment_rescaled L.pid7_rescaled L.ideo5_rescaled L.immstat_rescaled L.faminc_rescaled L.married_before L.educ_rescaled L.gender L.birthyr_rescaled L.catholic L.marstat_missing L.pid7_missing L.faminc_missing L.immstat_missing L.ideo5_missing if hispanic_all == 1
outreg2 using "regtable3A.doc"



***thesis controls
*
**broad attrition
*wave index makes it 1 to 3 instead of 10 to 14
tsset id wave_index
svyset [pweight=weight]
*model c
svy: regress white_attritor_broad rr_lag L.pid7_rescaled L.immstat_rescaled L.faminc_rescaled L.married_before L.educ_rescaled L.gender L.birthyr_rescaled L.catholic L.marstat_missing L.pid7_missing L.faminc_missing L.immstat_missing if hispanic_all == 1
outreg2 using "regtable3Arob.doc"

*model e
svy: regress white_attritor_broad hispanic_resentment_rescaled pid7_rescaled ideo5_rescaled immstat_rescaled faminc_rescaled married_before educ_rescaled gender birthyr_rescaled catholic marstat_missing pid7_missing faminc_missing immstat_missing ideo5_missing if hispanic_all == 1
outreg2 using "regtable3AArob.doc"

tab white_attritor_broad

***
***
***correlation analysis
***
***
***vif
***thesis Model A
regress white_attritor L.racial_resentment_rescaled if hispanic_all == 1
vif

***thesis Model(s) B
regress white_attritor rr_lag L.immstat_rescaled L.faminc_rescaled L.married_before L.educ_rescaled L.gender L.birthyr_rescaled L.catholic L.marstat_missing L.pid7_missing L.faminc_missing L.immstat_missing if hispanic_all == 1
vif


regress white_attritor L.racial_resentment L.immstat_rescaled L.faminc_rescaled L.married_before L.educ_rescaled L.gender L.birthyr_rescaled L.catholic L.marstat_missing L.pid7_missing L.faminc_missing L.immstat_missing if hispanic_all == 1
vif

***thesis Model(s) C
regress white_attritor rr_lag L.pid7_rescaled L.immstat_rescaled L.faminc_rescaled L.married_before L.educ_rescaled L.gender L.birthyr_rescaled L.catholic L.marstat_missing L.pid7_missing L.faminc_missing L.immstat_missing if hispanic_all == 1
vif

regress white_attritor rr_lag L.ideo5_rescaled L.immstat_rescaled L.faminc_rescaled L.married_before L.educ_rescaled L.gender L.birthyr_rescaled L.catholic L.marstat_missing L.pid7_missing L.faminc_missing L.immstat_missing L.ideo5_missing if hispanic_all == 1
vif

regress white_attritor rr_lag L.pid7_rescaled L.ideo5_rescaled L.immstat_rescaled L.faminc_rescaled L.married_before L.educ_rescaled L.gender L.birthyr_rescaled L.catholic L.marstat_missing L.pid7_missing L.faminc_missing L.immstat_missing L.ideo5_missing if hispanic_all == 1
vif

***thesis Model D
regress white_attritor hispanic_resentment_rescaled

regress white_attritor L.hispanic_resentment_rescaled if hispanic_all == 1
vif

***thesis Model E
regress white_attritor hispanic_resentment_rescaled pid7_rescaled ideo5_rescaled immstat_rescaled faminc_rescaled married_before educ_rescaled gender birthyr_rescaled catholic marstat_missing pid7_missing faminc_missing immstat_missing ideo5_missing if hispanic_all == 1
vif

***thesis Model F
regress white_attritor hispanic_resentment_rescaled L.pid7_rescaled L.ideo5_rescaled L.immstat_rescaled L.faminc_rescaled L.married_before L.educ_rescaled L.gender L.birthyr_rescaled L.catholic L.marstat_missing L.pid7_missing L.faminc_missing L.immstat_missing L.ideo5_missing if hispanic_all == 1
vif






svy: regress white_attritor L.racial_resentment L.pid7_rescaled L.ideo5_rescaled L.immstat_rescaled L.faminc_rescaled L.married_before L.educ_rescaled L.gender L.birthyr_rescaled L.catholic L.marstat_missing L.pid7_missing L.faminc_missing L.immstat_missing if hispanic_all == 1
vif

***vif with unweighted estimates
regress white_attritor L.racial_resentment L.pid7_rescaled L.ideo5_rescaled L.immstat_rescaled L.faminc_rescaled L.married_before L.educ_rescaled L.gender L.birthyr_rescaled L.catholic L.marstat_missing L.pid7_missing L.faminc_missing L.immstat_missing if hispanic_all == 1
vif




corr rr_lag L.pid7_rescaled
corr rr_lag L.ideo5_rescaled
corr rr_lag L.immstat_rescaled
corr rr_lag L.faminc_rescaled
corr rr_lag L.married_before
corr rr_lag L.educ_rescaled
corr rr_lag L.gender
corr rr_lag L.birthyr_rescaled
corr rr_lag L.catholic
corr rr_lag L.marstat_missing
corr L.pid7_rescaled L.ideo5_rescaled
corr L.pid7_rescaled L.educ_rescaled
corr L.pid7_rescaled L.immstat_rescaled
corr L.pid7_rescaled L.faminc_rescaled
corr L.pid7_rescaled L.married_before
corr L.pid7_rescaled L.gender
corr L.pid7_rescaled L.birthyr_rescaled
corr L.pid7_rescaled L.catholic
corr L.immstat_rescaled L.faminc_rescaled
corr L.immstat_rescaled L.married_before
corr L.immstat_rescaled L.educ_rescaled
corr L.immstat_rescaled L.gender
corr L.immstat_rescaled L.birthyr_rescaled
corr L.immstat_rescaled L.catholic
corr L.immstat_rescaled L.marstat_missing
corr L.faminc_rescaled L.married_before
corr L.faminc_rescaled L.educ_rescaled
corr L.faminc_rescaled L.gender
corr L.faminc_rescaled L.birthyr_rescaled
corr L.faminc_rescaled L.catholic
corr L.faminc_rescaled L.marstat_missing
corr L.married_before L.educ_rescaled
corr L.married_before L.gender
corr L.married_before L.birthyr_rescaled
corr L.married_before L.catholic
corr L.married_before L.marstat_missing
corr L.educ_rescaled L.gender
corr L.educ_rescaled L.birthyr_rescaled
corr L.educ_rescaled L.catholic
corr L.educ_rescaled L.marstat_missing
corr L.gender L.birthyr_rescaled
corr L.gender L.catholic
corr L.gender L.marstat_missing
corr L.birthyr_rescaled L.catholic
corr L.birthyr_rescaled L.marstat_missing
corr L.catholic L.marstat_missing

***
***
***hispanic_all correlation analysis
corr rr_lag L.pid7_rescaled if hispanic_all == 1
corr rr_lag L.ideo5_rescaled if hispanic_all == 1
corr rr_lag L.immstat_rescaled if hispanic_all == 1
corr rr_lag L.faminc_rescaled if hispanic_all == 1
corr rr_lag L.married_before if hispanic_all == 1
corr rr_lag L.educ_rescaled if hispanic_all == 1
corr rr_lag L.gender if hispanic_all == 1
corr rr_lag L.birthyr_rescaled if hispanic_all == 1
corr rr_lag L.catholic if hispanic_all == 1
corr rr_lag L.marstat_missing if hispanic_all == 1

corr L.pid7_rescaled L.ideo5_rescaled if hispanic_all == 1
corr L.pid7_rescaled L.educ_rescaled if hispanic_all == 1
corr L.pid7_rescaled L.immstat_rescaled if hispanic_all == 1
corr L.pid7_rescaled L.faminc_rescaled if hispanic_all == 1
corr L.pid7_rescaled L.married_before if hispanic_all == 1
corr L.pid7_rescaled L.gender if hispanic_all == 1
corr L.pid7_rescaled L.birthyr_rescaled if hispanic_all == 1
corr L.pid7_rescaled L.catholic if hispanic_all == 1

corr L.immstat_rescaled L.faminc_rescaled if hispanic_all == 1
corr L.immstat_rescaled L.married_before if hispanic_all == 1
corr L.immstat_rescaled L.educ_rescaled if hispanic_all == 1
corr L.immstat_rescaled L.gender if hispanic_all == 1
corr L.immstat_rescaled L.birthyr_rescaled if hispanic_all == 1
corr L.immstat_rescaled L.catholic if hispanic_all == 1
corr L.immstat_rescaled L.marstat_missing if hispanic_all == 1

corr L.faminc_rescaled L.married_before if hispanic_all == 1
corr L.faminc_rescaled L.educ_rescaled if hispanic_all == 1
corr L.faminc_rescaled L.gender if hispanic_all == 1
corr L.faminc_rescaled L.birthyr_rescaled if hispanic_all == 1
corr L.faminc_rescaled L.catholic if hispanic_all == 1
corr L.faminc_rescaled L.marstat_missing if hispanic_all == 1

corr L.married_before L.educ_rescaled if hispanic_all == 1
corr L.married_before L.gender if hispanic_all == 1
corr L.married_before L.birthyr_rescaled if hispanic_all == 1
corr L.married_before L.catholic if hispanic_all == 1
corr L.married_before L.marstat_missing if hispanic_all == 1

corr L.educ_rescaled L.gender if hispanic_all == 1
corr L.educ_rescaled L.birthyr_rescaled if hispanic_all == 1
corr L.educ_rescaled L.catholic if hispanic_all == 1
corr L.educ_rescaled L.marstat_missing if hispanic_all == 1

corr L.gender L.birthyr_rescaled if hispanic_all == 1
corr L.gender L.catholic if hispanic_all == 1
corr L.gender L.marstat_missing if hispanic_all == 1

corr L.birthyr_rescaled L.catholic if hispanic_all == 1
corr L.birthyr_rescaled L.marstat_missing if hispanic_all == 1

corr L.catholic L.marstat_missing if hispanic_all == 1


***
***thesis random chance simulator
***
***
generate attritor_2012 = .
replace attritor_2012 = 0 if _n > 1
by id: replace attritor_2012 = 1 if (wave == 12 & race_ != 3 & race_lag1 == 3) & _n > 1 & !missing(race_) & !missing(race_lag1) & race_ != 6

generate attritor_2014 = .
replace attritor_2014 = 0 if _n > 1
by id: replace attritor_2014 = 1 if (wave == 14 & race_ != 3 & race_lag1 == 3) & _n > 1 & !missing(race_) & !missing(race_lag1) & race_ != 6 & wave != 10

generate white_attritor_2012 = .
replace white_attritor_2012 = 0 if _n > 1 
replace white_attritor_2012 = 1 if (attritor_2012 == 1 & race_ == 1 & wave == 12)
**(36)
generate white_attritor_2014 = .
replace white_attritor_2014 = 0 if _n > 1 
replace white_attritor_2014 = 1 if (attritor_2014 == 1 & race_ == 1 & wave == 14)
**(39)

**create variables for race by year/wave
generate race_2014 = race_ if wave == 14
generate race_2012 = race_ if wave == 12
generate race_2010 = race_ if wave == 10

**make everyone's race their race in 2014. this assumes everyone's true race is their race in 2014, simulating random error
gen race_sim_2010 = race_2014
gen race_sim_2012 = race_2014
gen race_sim_2014 = race_2014
***
***
***attrition
* initialize attritor flag to 0
gen byte attritor_sim = 0

* mark as attritor if within-person observation _n > 1 and simulated race becomes hispanic
by id: replace attritor_sim = 1 if _n > 1 ///
    & (race_2014 != 3) ///
    & (race_sim_2012 == 3) ///
    & !missing(race_) ///
    & !missing(race_sim_2012) ///
    & (race_ != 6)

* also mark attritor if simulated 2010 race becomes hispanic (same logic)
by id: replace attritor_sim = 1 if _n > 1 ///
    & (race_2014 != 3) ///
    & (race_sim_2010 == 3) ///
    & !missing(race_) ///
    & !missing(race_sim_2010) ///
    & (race_ != 6)

* create white_attritor_sim: attritor and originally white (code 1)
******this could be wrong bc before it was race_ > changed to race_2014
gen byte white_attritor_sim = 0
replace white_attritor_sim = 1 if attritor_sim == 1 & race_2014 == 1

***
***
***


**hispanic based on simulation 
**first need to fix hispanic_sim so that it's the simulated race
		generate hispanic_all_sim = 0
		replace hispanic_all_sim = 1 if race_2014 == 3 
		replace hispanic_all_sim = 1 if race_sim_2012 == 3 
		replace hispanic_all_sim = 1 if race_sim_2010 == 3 
		replace hispanic_all_sim = 1 if attritor_sim == 1
		**idk if hispanic would capture hispanic simulation > I think this is just not something that is varied by the simulation bc some people just get double counted and that is fine, don't count hispanic question anyway
		replace hispanic_all_sim = 1 if hispanic_ == 1

**code: very stata heavy!!
set more off
set seed 123  // for reproducibility

* -----------------------------
* simulation parameters
* -----------------------------
local n_sim = 100                 // number of simulations
local white_to_hisp_2012 = 36     // # of hispanic → white reassignments in 2012
local white_to_hisp_2014 = 39     // # of hispanic → white reassignments in 2014

* scalars to accumulate coefficients across simulations
scalar res_white = 0
scalar res_hisp = 0

* -----------------------------
* main simulation loop
* -----------------------------
forvalues i = 1/`n_sim' {

    * -----------------------------
    * WHITE SUBSET SIMULATION
    * -----------------------------
    preserve
	*subset random ID to respondents who identify as white in 2014
        gen u = runiform() if race_2014 == 1
        * randomly pick white respondents to turn Hispanic in 2010
        sort u                  
        *bysort: replace race_sim_2010 = 3 in 1/`white_to_hisp_2012' // 
		*if race_2014 == 1 
		 * Mark first 36 white respondents in sorted order
		gen temp_rank = sum(!missing(u))
		replace race_sim_2010 = 3 if temp_rank <= `white_to_hisp_2012' & ///  
		race_2014 == 1
		drop temp_rank
        * do the same for 2012
        gen u2 = runiform() if race_2014 == 1
        sort u2
		 * Mark first 36 white respondents in sorted order
		gen temp_rank2 = sum(!missing(u2))
		replace race_sim_2012 = 3 if temp_rank2 <= `white_to_hisp_2014' & ///
		race_2014 == 1
		drop temp_rank2
        *bysort: replace race_sim_2012 = 3 in 1/`white_to_hisp_2014' //
		*if race_2014 == 1
		**attrition simulator
		drop attritor_sim
		generate attritor_sim = 0
		sort id
		by id: replace attritor_sim = 1 if (race_2014 != 3 & race_sim_2012 == 3) & ///
		_n > 1 & !missing(race_) & !missing(race_sim_2012) & race_ != 6
		by id: replace attritor_sim = 1 if (race_2014 != 3 & race_sim_2010 == 3) & ///
		_n > 1 & !missing(race_) & !missing(race_sim_2010) & race_ != 6
		drop white_attritor_sim 
		generate white_attritor_sim = 0 
		replace white_attritor_sim = 1 if attritor_sim == 1 & race_2014 == 1
		* Back fill manually
		* Instead of marking attrition when it happens, mark ALL 		
		*observations for people who ever attrit
		sort id wave
		by id: egen ever_attritor = max(attritor_sim)
		drop white_attritor_sim
		gen white_attritor_sim = (ever_attritor == 1 & race_sim_2010 == 3)
				
				
		**hispanic based on simulation 
		replace hispanic_all_sim = 1 if race_2014 == 3 
		replace hispanic_all_sim = 1 if race_sim_2012 == 3 
		replace hispanic_all_sim = 1 if race_sim_2010 == 3 
		replace hispanic_all_sim = 1 if attritor_sim == 1
		replace hispanic_all_sim = 1 if hispanic_ == 1
        * run regression on simulated data (adjust outcome variable)
		sort id
		tsset id wave_index
		svyset [pweight=weight]
		svy: regress white_attritor_sim L.racial_resentment_rescaled L.pid7_rescaled L.immstat_rescaled L.faminc_rescaled L.married_before L.educ_rescaled L.gender L.birthyr_rescaled L.catholic L.marstat_missing L.pid7_missing L.faminc_missing L.immstat_missing if hispanic_all_sim == 1
		
        * store coefficient for 2010 race change
		scalar coef_temp = _b[L.racial_resentment_rescaled]
        scalar res_white = res_white + coef_temp
    restore
}
display "average coef white->hispanic: " res_white/`n_sim'
 

***
***
***

**code: very stata heavy!!
set more off
set seed 123  // for reproducibility

* -----------------------------
* simulation parameters
* -----------------------------
local n_sim = 100                 // number of simulations
local white_to_hisp_2012 = 36     // # of hispanic → white reassignments in 2012
local white_to_hisp_2014 = 39     // # of hispanic → white reassignments in 2014

* scalars to accumulate coefficients across simulations
scalar res_white = 0
scalar res_hisp = 0

* -----------------------------
* main simulation loop
* -----------------------------
forvalues i = 1/`n_sim' {

    * -----------------------------
    * WHITE SUBSET SIMULATION
    * -----------------------------
    preserve
	*subset random ID to respondents who identify as white in 2014
        gen u = runiform() if race_2014 == 1
        * randomly pick white respondents to turn Hispanic in 2010
        sort u                  
        *bysort: replace race_sim_2010 = 3 in 1/`white_to_hisp_2012' // 
		*if race_2014 == 1 
		 * Mark first 36 white respondents in sorted order
		gen temp_rank = sum(!missing(u))
		replace race_sim_2010 = 3 if temp_rank <= `white_to_hisp_2012' & ///  
		race_2014 == 1
		drop temp_rank
        * do the same for 2012
        gen u2 = runiform() if race_2014 == 1
        sort u2
		 * Mark first 36 white respondents in sorted order
		gen temp_rank2 = sum(!missing(u2))
		replace race_sim_2012 = 3 if temp_rank2 <= `white_to_hisp_2014' & ///
		race_2014 == 1
		drop temp_rank2
        *bysort: replace race_sim_2012 = 3 in 1/`white_to_hisp_2014' //
		*if race_2014 == 1
		**attrition simulator
		drop attritor_sim
		generate attritor_sim = 0
		sort id
		by id: replace attritor_sim = 1 if (race_2014 != 3 & race_sim_2012 == 3) & ///
		_n > 1 & !missing(race_) & !missing(race_sim_2012) & race_ != 6
		by id: replace attritor_sim = 1 if (race_2014 != 3 & race_sim_2010 == 3) & ///
		_n > 1 & !missing(race_) & !missing(race_sim_2010) & race_ != 6
		drop white_attritor_sim 
		generate white_attritor_sim = 0 
		replace white_attritor_sim = 1 if attritor_sim == 1 & race_2014 == 1
		* Back fill manually
		* Instead of marking attrition when it happens, mark ALL 		
		*observations for people who ever attrit
		sort id wave
		by id: egen ever_attritor = max(attritor_sim)
		drop white_attritor_sim
		gen white_attritor_sim = (ever_attritor == 1 & race_sim_2010 == 3)
				
				
		**hispanic based on simulation 
		replace hispanic_all_sim = 1 if race_2014 == 3 
		replace hispanic_all_sim = 1 if race_sim_2012 == 3 
		replace hispanic_all_sim = 1 if race_sim_2010 == 3 
		replace hispanic_all_sim = 1 if attritor_sim == 1
		replace hispanic_all_sim = 1 if hispanic_ == 1
        * run regression on simulated data (adjust outcome variable)
		sort id
		tsset id wave_index
		svyset [pweight=weight]
		svy: regress white_attritor_sim hispanic_resentment_rescaled pid7_rescaled ideo5_rescaled immstat_rescaled faminc_rescaled married_before educ_rescaled gender birthyr_rescaled catholic marstat_missing pid7_missing faminc_missing immstat_missing ideo5_missing if hispanic_all_sim == 1
		
        * store coefficient for 2010 race change
		scalar coef_temp = _b[hispanic_resentment_rescaled]
        scalar res_white = res_white + coef_temp
    restore
}
display "average coef white->hispanic: " res_white/`n_sim'
 










*****thesis average compared to white people analysis
*regression for variable reference svy: regress white_attritor L.racial_resentment L.pid7_rescaled L.ideo5_rescaled L.immstat_rescaled L.faminc_rescaled L.married_before L.educ_rescaled L.gender L.birthyr_rescaled L.catholic L.marstat_missing L.pid7_missing L.faminc_missing L.immstat_missing if hispanic_all == 1

****goal of this part is to get the averages of the white identity across several metrics and then create variables comparing each person's response to the average and then make an index averaging how close they are to white average by maybe adding it up??

**most important identity characteristics: income, education, catholic status, immigration status
**most important ideologies: resentment, partisanship, ideology 
svy: mean faminc_rescaled if race_ == 1 & attritor == 0 & faminc_ <= 17
svy: mean faminc_ if race_ == 1 & attritor == 0 & faminc_ <= 17
**average = 7.003
tab faminc_ if race_ == 1 & attritor == 0 & faminc_ <= 17
gen mean_wyt_faminc = 0.0635364


svy: mean educ_ if race_ == 1 & attritor == 0
tab educ_ if race_ == 1 & attritor == 0
svy: mean educ_rescaled if race_ == 1 & attritor == 0
**average educ = 3.419 (some college to slightly more than some college)
gen mean_wyt_educ = 0.4838452


svy: mean catholic if race_ == 1 & attritor == 0
tab catholic if race_ == 1 & attritor == 0
**average = 0.19 (not catholic)
gen mean_wyt_catholic = 0.19

svy: mean immstat_ if race_ == 1 & attritor == 0
tab immstat_ if race_ == 1 & attritor == 0
**average = 4.557884 (between second and third gen)
svy: mean immstat_rescaled if race_ == 1 & attritor == 0
gen mean_wyt_immstat = 0.8878203

**average for resentment
svy: mean racial_resentment_rescaled if race_ == 1 & attritor == 0
tab racial_resentment_rescaled if race_ == 1 & attritor == 0
**average resentment = 0.6905473
gen mean_wyt_rr = 0.6905473

**average for ideology
gen ideo5_2 = ideo5_rescaled if ideo5_ != 6
**recode ideology 2
egen ideo5_2min = min(ideo5_2)
egen ideo5_2max = max(ideo5_2)
gen ideo5_2rescaled = (ideo5_2 - ideo5_2min) / (ideo5_2max - ideo5_2min)

svy: mean ideo5_2rescaled if race_ == 1 & attritor == 0 & ideo5_ != 6
tab ideo5_2rescaled if race_ == 1 & attritor == 0

**average ideology = 0.582931 (moderate > conservative)
gen mean_wyt_ideo5_2 = 0.582931

**average for partisanship
svy: mean pid7_rescaled if race_ == 1 & attritor == 0
tab pid7_ if race_ == 1 & attritor == 0
**average partisanship = 0.515881
gen mean_wyt_pid7 = 0.515881

***
***
***now i need to find the difference between each person and the average

gen wyt_dist_faminc = faminc_rescaled - mean_wyt_faminc

gen wyt_dist_educ = educ_rescaled - mean_wyt_educ

gen wyt_dist_catholic = catholic - mean_wyt_catholic

gen wyt_dist_immstat = immstat_rescaled - mean_wyt_immstat

gen wyt_dist_rr = racial_resentment_rescaled - mean_wyt_rr

gen wyt_dist_ideo5 = ideo5_2rescaled - mean_wyt_ideo5_2

gen wyt_dist_pid7 = pid7_rescaled - mean_wyt_pid7

**make the variables based on absolute value

gen abs_wyt_dist_faminc = abs(faminc_rescaled - mean_wyt_faminc)

gen abs_wyt_dist_educ = abs(educ_rescaled - mean_wyt_educ)

gen abs_wyt_dist_catholic = abs(catholic - mean_wyt_catholic)

gen abs_wyt_dist_immstat = abs(immstat_rescaled - mean_wyt_immstat)

gen abs_wyt_dist_rr = abs(racial_resentment_rescaled - mean_wyt_rr)

gen abs_wyt_dist_ideo5 = abs(ideo5_2rescaled - mean_wyt_ideo5_2)

gen abs_wyt_dist_pid7 = abs(pid7_rescaled - mean_wyt_pid7)

***make identity index
***what im trying to do is make an addition thing where I add up the identity values and they make a scale

gen identity_similarity = wyt_dist_faminc + wyt_dist_educ + wyt_dist_catholic + wyt_dist_immstat

gen abs_identity_similarity = abs_wyt_dist_faminc + abs_wyt_dist_educ + abs_wyt_dist_catholic + abs_wyt_dist_immstat

gen avg_identity_similarity = (abs_wyt_dist_faminc + abs_wyt_dist_educ + abs_wyt_dist_catholic + abs_wyt_dist_immstat)/4

***make the same thing but for beliefs
gen belief_similarity = wyt_dist_rr + wyt_dist_ideo5 + wyt_dist_pid7 

gen abs_belief_similarity = abs_wyt_dist_rr + abs_wyt_dist_ideo5 + abs_wyt_dist_pid7

gen avg_belief_similarity = (abs_wyt_dist_rr + abs_wyt_dist_ideo5 + abs_wyt_dist_pid7)/4

**new regression to see if similarity influences attrition

svy: regress white_attritor L.avg_belief_similarity L.avg_identity_similarity L.married_before L.gender L.birthyr_rescaled L.marstat_missing L.pid7_missing L.faminc_missing L.immstat_missing if hispanic_all == 1

svy: regress white_attritor L.avg_belief_similarity L.avg_identity_similarity

svy: regress white_attritor L.abs_belief_similarity L.abs_identity_similarity L.married_before L.gender L.birthyr_rescaled L.marstat_missing L.pid7_missing L.faminc_missing L.immstat_missing if hispanic_all == 1

svy: regress white_attritor L.belief_similarity L.identity_similarity L.married_before L.gender L.birthyr_rescaled L.marstat_missing L.pid7_missing L.faminc_missing L.immstat_missing if hispanic_all == 1

svy: regress white_attritor abs_belief_similarity abs_identity_similarity married_before gender birthyr_rescaled marstat_missing pid7_missing faminc_missing immstat_missing if hispanic_all == 1







